from pyspark import SparkConf, SparkContext

if __name__ == '__main__':
    # 构建SparkConf对象
    conf = SparkConf().setAppName("test").setMaster("local[*]")
    # 构建SparkContext执行环境入口对象
    sc = SparkContext(conf=conf)

    rdd = sc.parallelize([("a", 1), ("b", 1)])
    rdd2 = sc.parallelize([("a", 1), ("b", 2)])

    rdd3 = rdd.intersection(rdd2)
    """
    intersection:求2个rdd的交集
    """

    print(rdd3.collect())
